#Area Crosswalks
area_codes <- readRDS(here("State Data/area_crosswalk.RDS")) %>% 
  mutate(area_title = str_remove(area_title, " -- Statewide")) %>% 
  mutate(area_fips = case_when(
    nchar(area_fips) == 4 ~ paste("0", area_fips, sep = ""), 
    TRUE ~ area_fips
  ))

states <- data.frame(state_abbr = state.abb, area_title = state.name)

area_codes <- left_join(area_codes, states) %>% 
  mutate(state_abbr = case_when(
    area_title == "District of Columbia" ~ "DC",
    TRUE ~ state_abbr))
## Joining with `by = join_by(area_title)`
#Map Data
county_sf <- counties(cb = TRUE) %>% 
  shift_geometry(position = "outside")
## Retrieving data for the year 2021
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |                                                                      |   1%
  |                                                                            
  |=                                                                     |   1%
  |                                                                            
  |=                                                                     |   2%
  |                                                                            
  |==                                                                    |   2%
  |                                                                            
  |==                                                                    |   3%
  |                                                                            
  |===                                                                   |   4%
  |                                                                            
  |====                                                                  |   5%
  |                                                                            
  |====                                                                  |   6%
  |                                                                            
  |=====                                                                 |   7%
  |                                                                            
  |=====                                                                 |   8%
  |                                                                            
  |======                                                                |   8%
  |                                                                            
  |======                                                                |   9%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |========                                                              |  11%
  |                                                                            
  |=========                                                             |  12%
  |                                                                            
  |=========                                                             |  13%
  |                                                                            
  |=========                                                             |  14%
  |                                                                            
  |==========                                                            |  14%
  |                                                                            
  |==========                                                            |  15%
  |                                                                            
  |===========                                                           |  16%
  |                                                                            
  |============                                                          |  17%
  |                                                                            
  |=============                                                         |  18%
  |                                                                            
  |==============                                                        |  19%
  |                                                                            
  |==============                                                        |  20%
  |                                                                            
  |===============                                                       |  22%
  |                                                                            
  |================                                                      |  23%
  |                                                                            
  |=================                                                     |  24%
  |                                                                            
  |=================                                                     |  25%
  |                                                                            
  |==================                                                    |  25%
  |                                                                            
  |==================                                                    |  26%
  |                                                                            
  |===================                                                   |  27%
  |                                                                            
  |====================                                                  |  28%
  |                                                                            
  |====================                                                  |  29%
  |                                                                            
  |=====================                                                 |  30%
  |                                                                            
  |=====================                                                 |  31%
  |                                                                            
  |======================                                                |  31%
  |                                                                            
  |=======================                                               |  32%
  |                                                                            
  |=======================                                               |  33%
  |                                                                            
  |========================                                              |  34%
  |                                                                            
  |========================                                              |  35%
  |                                                                            
  |=========================                                             |  36%
  |                                                                            
  |==========================                                            |  37%
  |                                                                            
  |===========================                                           |  38%
  |                                                                            
  |===========================                                           |  39%
  |                                                                            
  |============================                                          |  39%
  |                                                                            
  |============================                                          |  40%
  |                                                                            
  |=============================                                         |  41%
  |                                                                            
  |=============================                                         |  42%
  |                                                                            
  |==============================                                        |  42%
  |                                                                            
  |==============================                                        |  43%
  |                                                                            
  |===============================                                       |  44%
  |                                                                            
  |================================                                      |  45%
  |                                                                            
  |================================                                      |  46%
  |                                                                            
  |=================================                                     |  47%
  |                                                                            
  |=================================                                     |  48%
  |                                                                            
  |==================================                                    |  48%
  |                                                                            
  |==================================                                    |  49%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |====================================                                  |  51%
  |                                                                            
  |====================================                                  |  52%
  |                                                                            
  |=====================================                                 |  52%
  |                                                                            
  |=====================================                                 |  54%
  |                                                                            
  |======================================                                |  54%
  |                                                                            
  |======================================                                |  55%
  |                                                                            
  |=======================================                               |  55%
  |                                                                            
  |=======================================                               |  56%
  |                                                                            
  |========================================                              |  57%
  |                                                                            
  |========================================                              |  58%
  |                                                                            
  |=========================================                             |  58%
  |                                                                            
  |=========================================                             |  59%
  |                                                                            
  |==========================================                            |  60%
  |                                                                            
  |==========================================                            |  61%
  |                                                                            
  |===========================================                           |  61%
  |                                                                            
  |===========================================                           |  62%
  |                                                                            
  |============================================                          |  62%
  |                                                                            
  |============================================                          |  63%
  |                                                                            
  |=============================================                         |  64%
  |                                                                            
  |==============================================                        |  66%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |===============================================                       |  68%
  |                                                                            
  |================================================                      |  68%
  |                                                                            
  |================================================                      |  69%
  |                                                                            
  |=================================================                     |  69%
  |                                                                            
  |=================================================                     |  70%
  |                                                                            
  |=================================================                     |  71%
  |                                                                            
  |==================================================                    |  71%
  |                                                                            
  |==================================================                    |  72%
  |                                                                            
  |===================================================                   |  73%
  |                                                                            
  |====================================================                  |  74%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |=====================================================                 |  75%
  |                                                                            
  |=====================================================                 |  76%
  |                                                                            
  |======================================================                |  77%
  |                                                                            
  |=======================================================               |  78%
  |                                                                            
  |=======================================================               |  79%
  |                                                                            
  |========================================================              |  80%
  |                                                                            
  |=========================================================             |  81%
  |                                                                            
  |=========================================================             |  82%
  |                                                                            
  |==========================================================            |  82%
  |                                                                            
  |==========================================================            |  83%
  |                                                                            
  |===========================================================           |  84%
  |                                                                            
  |===========================================================           |  85%
  |                                                                            
  |============================================================          |  85%
  |                                                                            
  |============================================================          |  86%
  |                                                                            
  |=============================================================         |  87%
  |                                                                            
  |=============================================================         |  88%
  |                                                                            
  |==============================================================        |  88%
  |                                                                            
  |===============================================================       |  89%
  |                                                                            
  |===============================================================       |  90%
  |                                                                            
  |================================================================      |  91%
  |                                                                            
  |================================================================      |  92%
  |                                                                            
  |=================================================================     |  93%
  |                                                                            
  |==================================================================    |  94%
  |                                                                            
  |==================================================================    |  95%
  |                                                                            
  |===================================================================   |  95%
  |                                                                            
  |===================================================================   |  96%
  |                                                                            
  |====================================================================  |  97%
  |                                                                            
  |====================================================================  |  98%
  |                                                                            
  |===================================================================== |  98%
  |                                                                            
  |===================================================================== |  99%
  |                                                                            
  |======================================================================|  99%
  |                                                                            
  |======================================================================| 100%
states_sf <- states(cb = TRUE, resolution = "20m") %>%
  shift_geometry(position = "outside")
## Retrieving data for the year 2021
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |=============                                                         |  19%
  |                                                                            
  |========================                                              |  34%
  |                                                                            
  |=============================================                         |  64%
  |                                                                            
  |================================================                      |  69%
  |                                                                            
  |======================================================                |  77%
  |                                                                            
  |============================================================          |  86%
  |                                                                            
  |======================================================================| 100%
#Convert county data to a table
county_data <- counties_leaflet %>% 
  as_tibble() %>% 
  select(STATEFP, COUNTYFP) %>% 
  mutate(area_fips = paste(STATEFP, COUNTYFP, sep = ""), 
         st = as.numeric(STATEFP))
#Get center of each county
county_centers <- counties_leaflet %>% 
  filter(str_detect(NAME, "Mariana", negate = TRUE)) %>% 
  st_centroid() %>% 
  sf::st_transform('+proj=longlat +datum=WGS84')
## Warning: st_centroid assumes attributes are constant over geometries
#Load Existing Data
qcew_3digits <- readRDS(here("State Data/qcew_3digit.RDS")) %>% 
  mutate(industry_desc = substring(industry_title, 11))
#Simplify to get industry codes
ind_3digit <- qcew_3digits %>% 
  select(naics_3digit = ind_code, naics_3digit_label = industry_desc) %>% 
  distinct() %>% 
  arrange(naics_3digit_label)
#Define colors 
emp_ind_vector <- c(brewer.pal(9, "Greys")[4], "#cf4633", "#BEAED4", "#FDC086", "#FDBF6F",  "#386CB0", "#FB8072", brewer.pal(9, "Greys")[3], brewer.pal(9, "Greys")[8], "#80B1D3", "#F0027F",  "#4DAF4A", "#F1E2CC", "#6A3D9A", "#E78AC3", "#CBD5E8", "#666666", brewer.pal(9, "Greys")[5],  brewer.pal(9, "Greys")[6], "#A65628", brewer.pal(9, "Greys")[7])

Introduction

This document cleans and summarizes input-output use and supply tables.

#read data
cbp_2019 <- read.csv(here("County Data/5_digit_naics_2019.csv")) %>% 
  mutate(area_fips = str_sub(GEO_ID, -5)) %>% 
  left_join(county_data) %>% 
  left_join(area_codes %>% select(st, state_abbr)) %>% 
  mutate(emp_num = as.numeric(EMP))
## Joining with `by = join_by(area_fips)`
## Joining with `by = join_by(st)`
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `emp_num = as.numeric(EMP)`.
## Caused by warning:
## ! NAs introduced by coercion

Data Ingestion

USE Table

use_table_raw <- read.csv(here("Input Output Data/USE_TABLE_2017/2017-Table 1.csv"))

We create a crosswalk between industry codes and their descriptions.

use_cols <- colnames(use_table_raw)

ind_codes <- use_table_raw %>% 
  head(1) %>% 
  pivot_longer(-c(1,2), names_to = "industry_desc", values_to = "industry_code") %>% 
  select(industry_code, industry_desc)

The first step will be to convert this data into a long data format.

use_table_long <- use_table_raw[-1,] %>% 
  pivot_longer(-c(1,2), names_to = "industry_desc", values_to = "value", values_drop_na = TRUE) %>%
  mutate(value_clean = str_remove_all(value, "[[:punct:]]"),
    value_num = as.numeric(value_clean)) %>% 
  filter(!is.na(value_num)) %>% 
  rename(commodity_code = 1, commodity_desc = 2) %>% 
  left_join(ind_codes)
## Joining with `by = join_by(industry_desc)`

The resulting dataframe details how individuals industries use specific commodities. E.G the DEMAND for specific INPUTS from industires. We add a few additional descriptive columns to be able to segment and sort our data. Values in this data are then the total amount in millions of dollars of specific inputs used by specific industries.

use_table_clean <- use_table_long %>% 
  mutate(com_code_3 = as.numeric(str_sub(commodity_code, 1, 3)),
         naics_3digit = as.numeric(str_sub(industry_code, 1, 3))) %>% 
  left_join(ind_3digit) 
## Warning: There were 2 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `com_code_3 = as.numeric(str_sub(commodity_code, 1, 3))`.
## Caused by warning:
## ! NAs introduced by coercion
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning.
## Joining with `by = join_by(naics_3digit)`

We note that some of the commodity codes refer to aggregate measures. The same is true for some of the industry codes.

agg_coms <- use_table_clean %>% select(commodity_code, commodity_desc, com_code_3) %>% distinct() %>% filter(is.na(com_code_3))
agg_coms <- agg_coms[-c(1:2),]

agg_coms %>% select(1:2)
## # A tibble: 20 × 2
##    commodity_code commodity_desc                                            
##    <chr>          <chr>                                                     
##  1 S00500         Federal general government (defense)                      
##  2 S00600         Federal general government (nondefense)                   
##  3 S00102         Other federal government enterprises                      
##  4 GSLGE          State and local government (educational services)         
##  5 GSLGH          State and local government (hospitals and health services)
##  6 GSLGO          State and local government (other services)               
##  7 S00203         Other state and local government enterprises              
##  8 S00401         Scrap                                                     
##  9 S00402         Used and secondhand goods                                 
## 10 S00300         Noncomparable imports                                     
## 11 S00900         Rest of the world adjustment                              
## 12 T005           Total intermediate inputs                                 
## 13 V00100         Compensation of employees                                 
## 14 T00OTOP        Other taxes on production                                 
## 15 V00300         Gross operating surplus                                   
## 16 VABAS          Value added (basic value)                                 
## 17 T018           Total industry output (basic value)                       
## 18 T00TOP         Plus: Taxes on products and imports                       
## 19 T00SUB         Less: Subsidies                                           
## 20 VAPRO          Value added (producer value)

These codes are useful in informing aggregate measures, such as total use of inputs, total use of imports. In addition, the USE table contains valuable information about total value added by industry. Total commodity output and total industry output are both found in the USE table.

We see that the use table provides a wide variety of measures about both commodity useage, as well as industry output.

agg_ind <- use_table_clean %>% select(industry_code, industry_desc, naics_3digit) %>% distinct() %>% filter(is.na(naics_3digit)) 
agg_ind <- agg_ind[-c(10),]

agg_ind %>% select(1:2)
## # A tibble: 33 × 2
##    industry_code industry_desc                                             
##    <chr>         <chr>                                                     
##  1 S00600        Federal.general.government..nondefense.                   
##  2 T001          Total.Intermediate                                        
##  3 F03000        Change.in.private.inventories                             
##  4 F04000        Exports.of.goods.and.services                             
##  5 T019          Total.use.of.products                                     
##  6 GSLGE         State.and.local.government..educational.services.         
##  7 GSLGH         State.and.local.government..hospitals.and.health.services.
##  8 GSLGO         State.and.local.government..other.services.               
##  9 F01000        Personal.consumption.expenditures                         
## 10 4B0000        All.other.retail                                          
## # ℹ 23 more rows

Create Aggregate Measures

Because of the way that these databases are constructred, the agg_ind dataframe above refers to commodity level summaries, while the agg_com dataframe above refers to industry level summaries. We spend some time at the moment to create some commodity and industry level benchmarks.

agg_coms %>% 
  filter(commodity_code %in% c("T005", "S00300", "VABAS", "T018", "VAPRO")) %>% 
  select(1:2)
## # A tibble: 5 × 2
##   commodity_code commodity_desc                     
##   <chr>          <chr>                              
## 1 S00300         Noncomparable imports              
## 2 T005           Total intermediate inputs          
## 3 VABAS          Value added (basic value)          
## 4 T018           Total industry output (basic value)
## 5 VAPRO          Value added (producer value)

We begin with creating benchmarks for value added, total output by industry, use of imports, and use of intermediary inputs.

ind_agg <- use_table_clean %>% 
  filter(commodity_code %in% c("T005", "S00300", "VABAS", "T018", "VAPRO")) %>% 
  select(commodity_code, commodity_desc, industry_code, industry_desc, value_num) %>% 
  pivot_wider(id_cols = c(industry_code, industry_desc), names_from = commodity_code, values_from = value_num) 

colnames(ind_agg) <- c("industry_code", "industry_desc", "imports", "intermediary_inputs", "value_added", "industry_output", "prod_value_added")

This dataframe contains information, aggregated at the INDUSTRY level, about the total of use of inputs by industries, AS WELL as the overall OUTPUT and VALUE ADDED of each industry.

We can immediately produce some summary statistics about overall industry activity here. Again, this work focuses specifically on the manufacturing sector.

Industry Level Manufacturing Sub-Sector Details

manf_ind <- ind_agg %>% 
  mutate(naics_3digit = as.numeric(str_sub(industry_code, 1, 3))) %>% 
  left_join(ind_3digit) %>% 
  filter(naics_3digit >= 300 & naics_3digit < 400) %>% 
  #FIX MISSING VALUES WITH 0
  mutate(across(.cols = c(3:7), ~ case_when(
    is.na(.) ~ 0, 
    TRUE ~ .)), 
    log_ind = log(industry_output), 
    log_value = log(value_added), 
    log_inputs = log(intermediary_inputs), 
    val_ratio = value_added/industry_output, 
    input_ratio = intermediary_inputs/industry_output, 
    log_imports = log(imports), 
    import_ratio = imports/intermediary_inputs) 
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `naics_3digit = as.numeric(str_sub(industry_code, 1, 3))`.
## Caused by warning:
## ! NAs introduced by coercion
## Joining with `by = join_by(naics_3digit)`
ind_pal <- emp_ind_vector[-c(1, 17, 9)]

Manufacturing Sub-Sector Total Output

ind_output <- manf_ind %>% 
  filter(industry_output != 0) %>% 
  ggplot() + 
  geom_density_ridges(aes(x = industry_output, y = naics_3digit_label, fill = naics_3digit_label)) + 
  scale_fill_manual(values = ind_pal) + 
  scale_x_continuous(labels = scales::comma) + 
  guides(fill = "none") + 
  labs(x = "2017 Total Industry Output (Millions of $)", y = "3 Digit NAICS Group") + 
  theme_bw() + 
  axis_theme

ind_output
## Picking joint bandwidth of 9620

ind_output_log <-  manf_ind %>% 
  filter(industry_output != 0) %>% 
  ggplot() + 
  geom_density_ridges(aes(x = log_ind, y = naics_3digit_label, fill = naics_3digit_label)) + 
  scale_fill_manual(values = ind_pal) + 
  scale_x_continuous(trans = scales::log_trans()) + 
  guides(fill = "none") + 
  labs(x = "Log of 2017 Total Industry Output (Millions of $)", y = "") + 
  theme_bw() + 
  axis_theme

ind_output_log
## Picking joint bandwidth of 0.0416

ind_output_comp <- ind_output + ind_output_log

ind_output_comp
## Picking joint bandwidth of 9620
## Picking joint bandwidth of 0.0416

Manufacturing Sub-Sector Value Added

We might also be interested in the distribution of different measures of value added across manufacturing sectors. This distribution is close to the total output distribution, but has slightly flatter peaks, and some shifted distributions.

ind_value_add <- manf_ind %>% 
  ggplot() + 
  geom_density_ridges(aes(x = log_value, y = naics_3digit_label, fill = naics_3digit_label)) + 
  scale_fill_manual(values = ind_pal) + 
  scale_x_continuous(trans = scales::log_trans()) + 
  guides(fill = "none") + 
  labs(x = "Log of 2017 Total Industry Output (Millions of $)", y = "") + 
  theme_bw() + 
  axis_theme

ind_value_add
## Picking joint bandwidth of 0.0464

ind_value_add + 
  geom_density_ridges(aes(x = log_ind, y = naics_3digit_label, fill = naics_3digit_label), alpha = 0.5) + 
  scale_x_continuous(breaks = c(0,6, 8, 10, 12, 14, 16), labels = c(0,6, 8, 10, 12, 14, 16))
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Picking joint bandwidth of 0.398
## 
## Picking joint bandwidth of 0.397

However, when we examine the ratio of value added per industry output, we see a very different distribution across manufacturing sub-sectors, with certain sub-sectors housing industries with much higher value added.

value_add_ratio <- manf_ind %>% 
  ggplot() + 
  geom_density_ridges(aes(x = val_ratio, y = naics_3digit_label, fill = naics_3digit_label)) + 
  scale_fill_manual(values = ind_pal) + 
  guides(fill = "none") + 
  labs(x = "Value Added Per Output, 2017", y = "") + 
  theme_bw() + 
  axis_theme

value_add_ratio
## Picking joint bandwidth of 0.0311

Manufacturing Sub-Sector Total Use of Inputs

We see that the distribution of use of inputs follows the distribution of total output very closely.

input_sum <- manf_ind %>% 
  ggplot() + 
  geom_density_ridges(aes(x = log_inputs, y = naics_3digit_label, fill = naics_3digit_label)) + 
  scale_fill_manual(values = ind_pal) + 
  scale_x_continuous(trans = scales::log_trans()) + 
  guides(fill = "none") + 
  labs(x = "Log of 2017 Total Industry Output (Millions of $)", y = "") + 
  theme_bw() + 
  axis_theme + 
  scale_x_continuous(breaks = c(0,6, 8, 10, 12, 14, 16), labels = c(0,6, 8, 10, 12, 14, 16))
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
input_sum
## Picking joint bandwidth of 0.388

input_sum + 
  geom_density_ridges(aes(x = log_value, y = naics_3digit_label, fill = naics_3digit_label), alpha = 0.8) + 
  geom_density_ridges(aes(x = log_ind, y = naics_3digit_label, fill = naics_3digit_label), alpha = 0.3, color = "white") + 
scale_x_continuous(breaks = c(0,6, 8, 10, 12, 14, 16), labels = c(0,6, 8, 10, 12, 14, 16)) +
  theme_dark()
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Picking joint bandwidth of 0.388
## 
## Picking joint bandwidth of 0.398
## 
## Picking joint bandwidth of 0.397

However, when examining the intensity of use of intermediary inputs (e.g. the ratio of intermediary inputs to output), the distribution of each 3 Digit NAICS Manufacturing sector changes substantially.

input_ratio <- manf_ind %>% 
  ggplot() + 
  geom_density_ridges(aes(x = input_ratio, y = naics_3digit_label, fill = naics_3digit_label)) + 
  scale_fill_manual(values = ind_pal) + 
  guides(fill = "none") + 
  labs(x = "2017 Ratio of Intermeidary Inputs to Industry Output", y = "") + 
  theme_bw() + 
  axis_theme

input_ratio
## Picking joint bandwidth of 0.0311

input_ratio + 
  geom_density_ridges(aes(x = val_ratio, y = naics_3digit_label, fill = naics_3digit_label), alpha = 0.7)
## Picking joint bandwidth of 0.0311
## Picking joint bandwidth of 0.0311

Manufacturing Sub-Sector Total Use of Imports

We conclude these summary statistics by examining the intensity of use of imports by manufacturing sub-sector.

import_sum <- manf_ind %>% 
  filter(imports != 0) %>% 
  ggplot() + 
  geom_density_ridges(aes(x = log_imports, y = naics_3digit_label, fill = naics_3digit_label)) + 
  scale_fill_manual(values = ind_pal) + 
  guides(fill = "none") + 
  labs(x = "2017 Log Imports", y = "") + 
  theme_bw() + 
  axis_theme

import_sum
## Picking joint bandwidth of 0.508

input_sum + 
  geom_density_ridges(aes(x = log_value, y = naics_3digit_label, fill = naics_3digit_label), alpha = 0.8) + 
  geom_density_ridges(aes(x = log_ind, y = naics_3digit_label, fill = naics_3digit_label), alpha = 0.3, color = "white") + 
  geom_density_ridges(data = manf_ind %>% 
  filter(imports != 0), 
                        aes(x = log_imports, y = naics_3digit_label, fill = naics_3digit_label), color = "grey") + 
scale_x_continuous(breaks = c(0,6, 8, 10, 12, 14, 16), labels = c(0,6, 8, 10, 12, 14, 16)) +
  theme_dark()
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Picking joint bandwidth of 0.388
## 
## Picking joint bandwidth of 0.398
## 
## Picking joint bandwidth of 0.397
## 
## Picking joint bandwidth of 0.508

import_rat <- manf_ind %>% 
  ggplot() + 
  geom_density_ridges(aes(x = import_ratio, y = naics_3digit_label, fill = naics_3digit_label)) + 
  scale_fill_manual(values = ind_pal) + 
  guides(fill = "none") + 
  labs(x = "2017 Ratio of Imports to Intermediate Inputs", y = "") + 
  theme_bw() + 
  axis_theme

import_rat
## Picking joint bandwidth of 0.00136

We see that the ratio of intermediary inputs to total output and the ratio of value added to total output, appear to almost mirror each other, supporting the observation that because of how input-output accounting is done, the ratio of intermediary inputs to total output also captures value added in the final production step.

Commodity Level Details

We now create some commodity level summaries, detailing the total use of specific commodities as intermediary inputs, compared to final products. In addition, we include measures of how many of these commodities are exported,

com_agg <- use_table_clean %>% 
  filter(industry_code %in% c("T001", "F04000", "T019")) %>% 
  select(commodity_code, commodity_desc, industry_code, industry_desc, value_num) %>% 
  pivot_wider(id_cols = c(commodity_code, commodity_desc), names_from = industry_desc, values_from = value_num) 

colnames(com_agg) <- c("commodity_code", "commodity_desc", "intermediate", "exports", "total_use")

This dataframe has COMMODITY level summaries. We can use commodity level summaries to control for the input-intensity of a given commodity. ****INSERT HERE: Commodities that have a higher share of intermediate use to total use, are

Then, we can look at the use of specific commodities by industries to understand which industries are using relatively input-intensive *****

Use of Inputs by Manufacturing Sub-Sectors

We now have aggregate commodity and industry level summaries, and can simplify our original dataframe. We will also need to perform similar aggregation measures in the supply table (the make table)

At this point, we might also want to separate out our commodities into some broad categories. While this categorization might be too granular, it at least allows us some baseline to assess industry use of specific types of inputs. This categorization is based on the NAICS codes used in the SUPPLY/MAKE tables, and roughly varies at the 3-digit level. A few specific inputs, such as R&D, are called out.

use_table_final <- use_table_clean %>% 
  filter(!is.na(com_code_3), !is.na(naics_3digit)) %>% 
  mutate(input_type = case_when(
    com_code_3 < 113 ~ "Agriculture and Farming", 
    com_code_3 == 113 | com_code_3 == 115 ~ "Forestry and Logging", 
    com_code_3 == 114 ~ "Fishing, Hunting, Trapping", 
    com_code_3 == 211 ~ "Oil and Gas Extraction", 
    com_code_3 == 212 ~ "Metal Mining", 
    com_code_3 == 213 ~ "Support for Oil, Gas, and Mining", 
    com_code_3 == 221 ~ "Electricity, Gas, Water, Sewage, and other systems", 
    com_code_3 >= 300 & com_code_3 < 400 ~ "Manufactured Input", 
    com_code_3 >= 400 & com_code_3 < 460 ~ "Other wholesalers, suppliers, retailers", 
    com_code_3 >= 481 & com_code_3 < 490 ~ "Transportation", 
    com_code_3 >= 491 & com_code_3 < 493 ~ "Postal Service and other messaging", 
    com_code_3 >= 500 & com_code_3 < 600 ~ "Software, Information, Financial, Legal, and other services", 
    com_code_3 == 611 ~ "Junior colleges, colleges, universities, professional schools, and other educational services", 
    com_code_3 >= 621 & com_code_3 < 700 ~ "Hospitals, medical and diagnostic labs, and other health care services", 
    com_code_3 == 811 | com_code_3 == 230 ~ "Maintenance & Repair", 
    com_code_3 == 813 ~ "Grantmaking and Philanthropy", 
    TRUE ~ "Other inputs (e.g. arts & recreation; accomodation, etc.)"
  )) %>%
  mutate(input_type = case_when(
    commodity_code == "423100" ~ "Motor vehicle and motor vehicle parts and supplies",
    commodity_code == "423800" ~ "Machinery, Equipment, Supplies", 
    commodity_code == "424700" ~ "Petroleum and Petroleum Products", 
    commodity_code == "541700" ~ "Scientific research and development services", 
    TRUE ~ input_type
  )) 
#Create dataframe for ordering for later

com_order <- use_table_final %>% 
  group_by(input_type) %>% 
  reframe(com_sum = sum(value_num, na.rm = TRUE)) %>%
  arrange(input_type) %>% 
  mutate(alpha_order = seq(n())) %>%
  arrange(desc(com_sum)) %>% 
  mutate(num_order = seq(n()))

Summary Statistics

We now start by creating some high level summary statistics by industry, focusing specifically on the manufacturing sector. Previous work has categorized 6-digit NAICS sectors into traded, or local (Delgado et al., 2014); or “supply-chain” industries (Delgado and Mills, 2020), but explicitly does not consider the supply chain of specific companies (e.g. GM), or specific industries. In this work, we seek to map out specific supply chains for specific industries, and examine how supply chains organize differently across regions. In the above section, we already provided summary statistics about the total use of inputs by industries. Now, we want to specifically focus on the type of inputs that manufacturers require.

  1. Manufacturing Sub-sectors by use of inputs
manf_inputs <- use_table_final %>% 
  filter(naics_3digit >= 300 & naics_3digit < 400) %>% 
  select(-c(value, value_clean)) %>% 
  left_join(manf_ind) %>% 
  mutate(input_share = value_num / intermediary_inputs) 
## Joining with `by = join_by(industry_desc, industry_code, naics_3digit,
## naics_3digit_label)`

In total, we have, ‘r manf_inputs %>% select(industry_desc) %>% distinct() %>% nrow()’ (232) different types of manufacturing, at the 6-digit level. To make sense of the variation in use of inputs by manufacturing sub-sector, we start with a 3-level by 3-level summary, acknowledging that there is substantial variation in use of inputs by the different manufacturing sector.

input_list <- manf_inputs %>% 
  select(input_type) %>% 
  distinct() %>% 
  unlist()
input_agg_share <- manf_inputs %>% 
  group_by(industry_code, industry_desc, naics_3digit_label, input_type) %>% 
  reframe(input_share = sum(input_share, na.rm = TRUE), intermediary_inputs, value = sum(value_num, na.rm = TRUE)) %>% 
  distinct() 
manf_codes <- manf_inputs %>% 
  select(naics_3digit_label) %>% 
  distinct() %>% 
  unlist()
ExpandColorsLIGHT <- function(colors, n, steps = 11){
  if(n <= steps){
    suppressWarnings({
      sapply(colors, function(x){colorRampPalette(c(x, "#FFFFFF"))(steps)}) %>% 
        as.data.frame() %>% 
        filter(row_number() <= n) %>% 
        gather(key = original.color, value = expanded.color)
    })
  }else{
    warning("Select n < steps!")
  }
}
ExpandColorsDARK <- function(colors, n, steps = 11){
  if(n <= steps){
    suppressWarnings({
      sapply(colors, function(x){colorRampPalette(c(x, "#000000"))(steps)}) %>% 
        as.data.frame() %>% 
        filter(row_number() <= n) %>% 
        gather(key = original.color, value = expanded.color)
    })
  }else{
    warning("Select n < steps!")
  }
}
input_list %>% unname()
##  [1] "Agriculture and Farming"                                                                      
##  [2] "Forestry and Logging"                                                                         
##  [3] "Fishing, Hunting, Trapping"                                                                   
##  [4] "Oil and Gas Extraction"                                                                       
##  [5] "Metal Mining"                                                                                 
##  [6] "Electricity, Gas, Water, Sewage, and other systems"                                           
##  [7] "Maintenance & Repair"                                                                         
##  [8] "Manufactured Input"                                                                           
##  [9] "Motor vehicle and motor vehicle parts and supplies"                                           
## [10] "Other wholesalers, suppliers, retailers"                                                      
## [11] "Machinery, Equipment, Supplies"                                                               
## [12] "Petroleum and Petroleum Products"                                                             
## [13] "Transportation"                                                                               
## [14] "Postal Service and other messaging"                                                           
## [15] "Other inputs (e.g. arts & recreation; accomodation, etc.)"                                    
## [16] "Software, Information, Financial, Legal, and other services"                                  
## [17] "Scientific research and development services"                                                 
## [18] "Junior colleges, colleges, universities, professional schools, and other educational services"
## [19] "Grantmaking and Philanthropy"
input_cols <- c("#4DAF4A", "#D8CBB7", "#C7E1C6", "#7E5F37", "#252525", "#F3C38F" ,"#80B1D3", "#597B93", "#954D24", "#969696", "#386CB0","#E78AC3", "#B1865D", "#D9D9D9", "#BDBDBD", "#cf4633", "#FB8072" ,  "#F1E2CC",  "#703933") 
input_col_map <- data.frame(input_type = input_list %>% unname(), input_cols) %>% 
  mutate(tal = seq(n()))

com_order <- left_join(input_col_map, com_order)
## Joining with `by = join_by(input_type)`
# Map Inputs to Colors
get_col_vec <- function(data){ 
  data %>% 
    select(input_type) %>% 
    distinct() %>% 
    unlist()
  }
#Function to get appropriate input colors
get_col_match <- function(data){ 
  
 col_vec <- get_col_vec(data)
 
 col_out <- input_col_map %>% 
   filter(input_type %in% col_vec) %>% 
   select(input_cols) %>% 
   unlist() %>% 
   unname()
 
  return(col_out)
  
}
make_map <- function(number){ 
  
  df <- input_agg_share %>% 
    filter(naics_3digit_label %in% manf_codes[number], !is.na(input_share)) %>% 
    mutate(val_round = round(input_share, 5)) %>% 
    filter(val_round > 0) %>% 
    group_by(input_type) %>% 
    mutate(count = n()) %>% 
    filter(count >= 3) %>% 
    ungroup() %>% 
    left_join(com_order) 
  
  col_vec_1 <- df %>% 
    get_col_match()
  
  # col_vec_2 <- df %>% 
  #   filter(count <3) %>% 
  #   get_col_match()
  
  df %>% 
    ggplot(aes(x = input_share, y = reorder(input_type, -num_order), fill = reorder(input_type, tal))) + 
      geom_density_ridges(alpha = 0.7, jittered_points = TRUE, point_alpha=1,point_shape=21) +
      geom_text(data = df %>% select(input_type, num_order, tal) %>% distinct(), aes(x = 0.35, y = reorder(input_type, -num_order), label = input_type), color = "black", nudge_y = -.2) + 
      scale_fill_manual(values = col_vec_1, guide = "none") +
    # ggnewscale::new_scale_fill() + 
      # geom_point(data = df %>% filter(count <3), aes(), shape = 21) + 
      # scale_fill_manual(values = col_vec_2, guide = "none") + 
      theme_bw() + 
      labs(x = "", y = "", title = manf_codes[number], fill = "") 
  
  }
make_map_simple <- function(number){ 
  
  df <- input_agg_share %>% 
    filter(naics_3digit_label %in% manf_codes[number], !is.na(input_share)) %>% 
    # group_by(input_type) %>% 
    # reframe(inputs_total = sum(intermediary_inputs), individual_input = sum(value)) %>% 
    # mutate(input_share = individual_input / inputs_total) %>% 
    left_join(com_order) 
  
  col_vec_1 <- df %>% 
    get_col_match()
  
  # col_vec_2 <- df %>% 
  #   filter(count <3) %>% 
  #   get_col_match()
  
  df %>% 
    ggplot(aes(x = input_share, y = reorder(input_type, -num_order), fill = reorder(input_type, tal)), group = industry_desc) + 
    geom_col(alpha = 0.7, color = "black", position = "stack") + 
      geom_text(data = df %>% select(input_type, num_order, tal) %>% distinct(), aes(x = 0.35, y = reorder(input_type, -num_order), label = input_type), color = "black", nudge_y = -.2) + 
      scale_fill_manual(values = col_vec_1, guide = "none") +
    guides(color = "none") + 
    # ggnewscale::new_scale_fill() + 
      # geom_point(data = df %>% filter(count <3), aes(), shape = 21) + 
      # scale_fill_manual(values = col_vec_2, guide = "none") + 
      theme_bw() + 
      labs(x = "", y = "", title = manf_codes[number], fill = "") + 
    theme(legend.position = "bottom")
  
  }
manf_codes %>% unname()
##  [1] "Food manufacturing"                                          
##  [2] "Chemical manufacturing"                                      
##  [3] "Beverage and tobacco product manufacturing"                  
##  [4] "Miscellaneous manufacturing"                                 
##  [5] "Textile mills"                                               
##  [6] "Textile product mills"                                       
##  [7] "Wood product manufacturing"                                  
##  [8] "Nonmetallic mineral product manufacturing"                   
##  [9] "Furniture and related product manufacturing"                 
## [10] "Apparel manufacturing"                                       
## [11] "Leather and allied product manufacturing"                    
## [12] "Paper manufacturing"                                         
## [13] "Printing and related support activities"                     
## [14] "Plastics and rubber products manufacturing"                  
## [15] "Primary metal manufacturing"                                 
## [16] "Fabricated metal product manufacturing"                      
## [17] "Transportation equipment manufacturing"                      
## [18] "Petroleum and coal products manufacturing"                   
## [19] "Electrical equipment, appliance, and component manufacturing"
## [20] "Machinery manufacturing"                                     
## [21] "Computer and electronic product manufacturing"
food_manf <- make_map(1) 
## Joining with `by = join_by(input_type)`
chem_manf <- make_map(2) 
## Joining with `by = join_by(input_type)`
bev_manf <- make_map(3) 
## Joining with `by = join_by(input_type)`
misc_manf <- make_map(4)
## Joining with `by = join_by(input_type)`
text_manf <- make_map(5) 
## Joining with `by = join_by(input_type)`
text_prod_manf <- make_map(6)
## Joining with `by = join_by(input_type)`
wood_manf <- make_map(7) 
## Joining with `by = join_by(input_type)`
non_metal_min_manf <- make_map(8) 
## Joining with `by = join_by(input_type)`
furn_manf <- make_map(9) 
## Joining with `by = join_by(input_type)`
apparel_manf <- make_map_simple(10) 
## Joining with `by = join_by(input_type)`
leather_manf <- make_map_simple(11) 
## Joining with `by = join_by(input_type)`
paper_manf <- make_map(12) 
## Joining with `by = join_by(input_type)`
printing <- make_map_simple(13) 
## Joining with `by = join_by(input_type)`
plastics_manf <- make_map(14)
## Joining with `by = join_by(input_type)`
metal_manf <- make_map(15)
## Joining with `by = join_by(input_type)`
fab_metal_manf <- make_map(16)
## Joining with `by = join_by(input_type)`
transport_equipment_manf <- make_map(17) 
## Joining with `by = join_by(input_type)`
petro_manf <- make_map(18)
## Joining with `by = join_by(input_type)`
ee_appliance_component_manf <- make_map(19)
## Joining with `by = join_by(input_type)`
machine_manf <- make_map(20)
## Joining with `by = join_by(input_type)`
comp_ee_manf <- make_map(21)
## Joining with `by = join_by(input_type)`

Across food, miscellaneous, and beverage manufacturing, the distribution of the use of manufactured inputs is fairly flat, but wide for food manufacturing, higher and shifted right for miscellaneous manufacturing, as well as beverage and tobacco manufacturing. Miscellaneous manufacturing and beverage and tobacco manufacturing both also have a higher ratio of software, financial, legal, and other information services. The use of agricultural/farming as well fishing/hunting inputs is also not surprising.

g1 <- food_manf + theme(axis.text.y = element_blank())
g2 <- misc_manf + theme(axis.text.y = element_blank())
g3 <- bev_manf + theme(axis.text.y = element_blank())

gA <- g1 + g2 + g3

gA
## Picking joint bandwidth of 0.0193
## Picking joint bandwidth of 0.00642
## Picking joint bandwidth of 0.0191

Across chemical, plastics & rubber, and petroleum and coal product manufacturing, both chemical as well as petroleum and coal product manufacturing use mined inputs. One of the subsectors in petroleum and coal product manufacturing uses fewer manufactured inputs. There is some use of warehousing in chemical manufacturing, and both chemical manufacturing as well as plastics and rubber product manufacturing have some sub-sectors with a high use of electricity, water, sewage, gas, and other services.

g4 <- chem_manf + theme(axis.text.y = element_blank()) 
g5 <- plastics_manf + theme(axis.text.y = element_blank())
g6 <- petro_manf + theme(axis.text.y = element_blank())

gB <- g4 + g5 + g6

gB
## Picking joint bandwidth of 0.00818
## Picking joint bandwidth of 0.00559
## Picking joint bandwidth of 0.0229

Across paper, printing, and leather manufacturing, the use of manufactured inputs, as well as electricity, gas, sewage, water and other services, is rather high, along with the use of software and other financial services. Unsurprisingly, paper manufacturing has high use of forestry and logging. Printing and leather manufacturing only have two and one sub-industry, respectively.

g7 <- paper_manf + theme(axis.text.y = element_blank()) 
g8 <- printing + theme(axis.text.y = element_blank()) 
g9 <- leather_manf + theme(axis.text.y = element_blank()) 

gC <- g7 + g8 + g9

gC
## Picking joint bandwidth of 0.0136

Wood product manufacturing consumes forestry and logging inputs, and not all sub-sectors have a high useage of manufactured inputs, while furniture manufacturing has a high use of manufactured inputs. There is only one sub-sector in apparel manufacturing.

g10 <- wood_manf + theme(axis.text.y = element_blank()) 
g11 <- furn_manf + theme(axis.text.y = element_blank()) 
g12 <- apparel_manf + theme(axis.text.y = element_blank()) 

gD <- g10 + g11 + g12

gD
## Picking joint bandwidth of 0.0157
## Picking joint bandwidth of 0.00512

Across textile mills, textile product mills, and machinery manufacturing, the use of manufactured inputs is high.

g13 <- text_manf + theme(axis.text.y = element_blank()) 
g14 <- text_prod_manf + theme(axis.text.y = element_blank()) 
g15 <- machine_manf + theme(axis.text.y = element_blank()) 

gE <- g13 + g14 + g15

gE
## Picking joint bandwidth of 0.015
## Picking joint bandwidth of 0.0106
## Picking joint bandwidth of 0.00622

Across fab-metal manufacturing, metal manufacturing, and non-metallic minerals manufacturing, the use of metal mining is high in the later two, the use of manufactured inputs is high in fabricated metal manufacturing and more distributed in the other two, and both of the later two also have some subsectors that use eletricity, gas, water, sewage, and other services.

g16 <- fab_metal_manf + theme(axis.text.y = element_blank()) 
g17 <- metal_manf + theme(axis.text.y = element_blank()) 
g18 <- non_metal_min_manf + theme(axis.text.y = element_blank()) 

gF <- g16 + g17 + g18

gF
## Picking joint bandwidth of 0.00442
## Picking joint bandwidth of 0.0177
## Picking joint bandwidth of 0.0173

Across electricial equipment; computer and electonic, and transportation equipment manufacturing, we again see heterogeneity in the use of software type services, as well as manufactured inputs. Electrical equipment as well as computer and electronic manufacturing both use more wholesalers.

g19 <- ee_appliance_component_manf + theme(axis.text.y = element_blank()) 
g20 <- comp_ee_manf + theme(axis.text.y = element_blank()) 
g21 <- transport_equipment_manf + theme(axis.text.y = element_blank()) 

gG <- g19 + g20 + g21 

gG
## Picking joint bandwidth of 0.00625
## Picking joint bandwidth of 0.00909
## Picking joint bandwidth of 0.00546

Overall, we can make some broad observations above the distribution of the use of inputs by 6-digit manufacturing sub-sectors. There is wide variation in the use of software, information, financial, legal, and other such services across sub-sectors. Overall, as 3-digit level industries use specific inputs, the heterogeneity of use of specific types of inputs also appears to increase as well.

Other figures to create: - use of imports by manufacturing industry? - total use of intermediary inputs by manufacturing industry?

Measures we might want to calculate or use: - % of industry total inputs a specific commodity is (this measure should preceed the following two steps)

Outcome oriented: what do we want? Relative measures of intensity of use of intermediate materials by subsector??? Soemthing else?? BLEeeh….. very difficult.

MAKE TABLE

supply_table_raw <- read.csv(here("Input Output Data/SUPPLY_TABLE_2017/2017-Table 1.csv"))

We create a crosswalk between industry codes and their descriptions.

supply_cols <- colnames(supply_table_raw)

sup_ind_codes <- supply_table_raw %>% 
  head(1) %>% 
  pivot_longer(-c(1,2), names_to = "industry_desc", values_to = "industry_code") %>% 
  select(industry_code, industry_desc)

The first step will be to convert this data into a long data format.

supply_table_long <- supply_table_raw[-1,] %>% 
  pivot_longer(-c(1,2), names_to = "industry_desc", values_to = "value", values_drop_na = TRUE) %>%
  mutate(value_clean = str_remove_all(value, "[[:punct:]]"),
    value_num = as.numeric(value_clean)) %>% 
  filter(!is.na(value_num)) %>% 
  rename(commodity_code = 1, commodity_desc = 2) %>% 
  left_join(sup_ind_codes)
## Joining with `by = join_by(industry_desc)`

The resulting dataframe details how individuals industries MAKE specific commodities. E.G the SUPPLY for specific OUTPUTS from industries. Total output from industry are given by the “diagonal” entry (i.e commodity code = industry code).

Again, we add a few additional descriptive columns to be able to segment and sort our data. Values in this data are then the total amount in millions of dollars of specific outputs produced by specific industries.

supply_table_clean <- supply_table_long %>% 
  mutate(com_code_3 = as.numeric(str_sub(commodity_code, 1, 3)),
         naics_3digit = as.numeric(str_sub(industry_code, 1, 3))) %>% 
  left_join(ind_3digit) 
## Warning: There were 2 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `com_code_3 = as.numeric(str_sub(commodity_code, 1, 3))`.
## Caused by warning:
## ! NAs introduced by coercion
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning.
## Joining with `by = join_by(naics_3digit)`

As above, we note that some of the commodity codes refer to aggregate measures. The same is true for some of the industry codes.

sup_agg_coms <- supply_table_clean %>% select(commodity_code, commodity_desc, com_code_3) %>% distinct() %>% filter(is.na(com_code_3))
sup_agg_coms <- sup_agg_coms[-c(1:2),]

sup_agg_coms %>% select(1:2)
## # A tibble: 13 × 2
##    commodity_code commodity_desc                                            
##    <chr>          <chr>                                                     
##  1 52A000         Monetary authorities and depository credit intermediation 
##  2 S00500         Federal general government (defense)                      
##  3 S00600         Federal general government (nondefense)                   
##  4 S00102         Other federal government enterprises                      
##  5 GSLGE          State and local government (educational services)         
##  6 GSLGH          State and local government (hospitals and health services)
##  7 GSLGO          State and local government (other services)               
##  8 S00203         Other state and local government enterprises              
##  9 S00401         Scrap                                                     
## 10 S00402         Used and secondhand goods                                 
## 11 S00300         Noncomparable imports                                     
## 12 S00900         Rest of the world adjustment                              
## 13 T017           Total industry supply

These codes are useful in informing aggregate measures, such as total industry supply. As before, some aggregation over industry codes is necessary as well. Here, aggregation includes the measures of imports per commodity, as well as total commodity output.

sup_agg_ind <- supply_table_clean %>% select(industry_code, industry_desc, naics_3digit) %>% distinct() %>% filter(is.na(naics_3digit)) 

Create Aggregate Measures

Because of the way that these databases are constructred, the sup_agg_ind dataframe above refers to commodity level summaries, while the sup_agg_com dataframe above refers to industry level summaries. We spend some time at the moment to create some commodity and industry level benchmarks.

sup_agg_coms %>% 
  filter(commodity_code %in% c("T017")) %>% 
  select(1:2)
## # A tibble: 1 × 2
##   commodity_code commodity_desc       
##   <chr>          <chr>                
## 1 T017           Total industry supply

We begin with creating benchmarks for total supply by industry.

sup_ind_agg <- supply_table_clean %>% 
  filter(commodity_code %in% c("T017")) %>% 
  select(commodity_code, commodity_desc, industry_code, industry_desc, value_num) %>% 
  pivot_wider(id_cols = c(industry_code, industry_desc), names_from = commodity_code, values_from = value_num) 

colnames(sup_ind_agg) <- c("industry_code", "industry_desc", "total_industry_supply")

We now create some commodity level summaries, detailing the total commodity output, as well as total commodity imports

sup_com_agg <- supply_table_clean %>% 
  filter(industry_code %in% c("T007", "MCIF")) %>% 
  select(commodity_code, commodity_desc, industry_code, industry_desc, value_num) %>% 
  pivot_wider(id_cols = c(commodity_code, commodity_desc), names_from = industry_desc, values_from = value_num) 

colnames(sup_com_agg) <- c("commodity_code", "commodity_desc", "total_commodity_output", "total_commodity_imports")

We now have aggregate commodity and industry level summaries, and can simplify our original dataframe. We will also need to perform similar aggregation measures in the supply table (the make table?)

At this point, we might also want to separate out our commodities into some broad categories. While this categorization might be too granular, it at least allows us some baseline to assess industry use of specific types of inputs.

use_table_final <- use_table_clean %>% 
  filter(!is.na(com_code_3), !is.na(naics_3digit)) %>% 
  mutate(input_type = case_when(
    com_code_3 < 113 ~ "Agriculture and Farming", 
    com_code_3 == 113 | com_code_3 == 115 ~ "Forestry and Logging", 
    com_code_3 == 114 ~ "Fishing, Huntinng, Trapping", 
    com_code_3 == 211 ~ "Oil and Gas Extraction", 
    com_code_3 == 212 ~ "Metal Mining", 
    com_code_3 == 213 ~ "Support for Oil, Gas, and Mining", 
    com_code_3 == 221 ~ "Electricity, Gas, Water, Sewage, and other systems", 
    com_code_3 == 233 ~ "Healthcare, educational, or vocational structures",
    com_code_3 >= 300 & com_code_3 < 400 ~ "Manufactured Output", 
    com_code_3 >= 400 & com_code_3 < 460 ~ "Other wholesalers, suppliers, retailers", 
    com_code_3 >= 481 & com_code_3 < 490 ~ "Transportation", 
    com_code_3 >= 491 & com_code_3 < 493 ~ "Postal Service and other messaging", 
    com_code_3 >= 500 & com_code_3 < 600 ~ "Software, Information, Financial, Legal, and other service", 
    com_code_3 == 611 ~ "Junior colleges, colleges, universities, professional schools, and other educational services", 
    com_code_3 >= 621 & com_code_3 < 700 ~ "Hospitals, medical and diagnostic labs, and other health care services", 
    com_code_3 == 811 | com_code_3 == 230 ~ "Maintenance & Repair", 
    com_code_3 == 813 ~ "Grantmaking and Philanthropy", 
    TRUE ~ "Other Outputs (e.g. arts & recreation; accomodation, etc.)"
  )) %>%
  mutate(input_type = case_when(
    commodity_code == "423100" ~ "Motor vehicle and motor vehicle parts and supplies",
    commodity_code == "423800" ~ "Machinery, Equipment, Supplies", 
    commodity_code == "424700" ~ "Petroleum and Petroleum Products", 
    commodity_code == "541700" ~ "Scientific research and development services", 
    TRUE ~ input_type
  ))